Load data

here::set_here()
## File .here already exists in /Users/koenvandenberge/Dropbox/research/brainStat/hbcRegenGithub/scripts
suppressPackageStartupMessages({
  library(slingshot)
  # devtools::install_github("statOmics/tradeSeq@2c8b916cbc1a16289a6d625f39ab67c2b283e959")
  library(tradeSeq)
  library(SingleCellExperiment)
  library(cowplot)
  library(rgl)
  library(clusterExperiment)
  library(RColorBrewer)
  library(aggregation)
  library(ggplot2)
  library(pheatmap)
  library(wesanderson)
  library(UpSetR)
  library(gridExtra)
  library(scales)
})

Import data

sds <- readRDS("../data/finalTrajectory/sling.rds")
counts <- readRDS("../data/finalTrajectory/counts_noResp_noMV.rds")
counts <- round(counts)
sce <- readRDS("../data/finalTrajectory/sce_tradeSeq20200904.rds")
timePoint <- readRDS("../data/finalTrajectory/timePoint_noResp_noMV.rds")
clDatta <- readRDS("../data/finalTrajectory/dattaCl_noResp_noMV.rds")

Chronological timepoints

plot3d(reducedDim(sds), aspect = 'iso', col=brewer.pal(9,'Set1')[timePoint], alpha=.6)

df <- data.frame(UMAP1=reducedDim(sds)[,1],
                 UMAP2=reducedDim(sds)[,2],
                 UMAP3=reducedDim(sds)[,3],
                 time=factor(timePoint),
                 col=brewer.pal(9,'Set1')[timePoint])

ggplot(df, aes(x=UMAP1, y=UMAP2, col=time)) +
  geom_point(size=.2, alpha=.5) +
  scale_color_manual(values = brewer.pal(9,'Set1')) +
  theme_classic() +
  ggtitle("Cells colored by sampled timepoint") +
  guides(colour = guide_legend(override.aes = list(alpha = 1, size=1.5)))

# ggsave("../plots/timePointDR.png", width=9)


ggplot(df, aes(x=UMAP1, y=UMAP3, col=time)) +
  geom_point(size=.2, alpha=.5) +
  scale_color_manual(values = brewer.pal(9,'Set1')) +
  theme_classic() +
  ggtitle("Cells colored by sampled timepoint") +
  guides(colour = guide_legend(override.aes = list(alpha = 1, size=1.5))) +
  coord_fixed()

ggsave("../plots/timePointDR_dim13.png")
## Saving 7 x 5 in image

3D plot of trajectory

plot3d(reducedDim(sds), aspect = 'iso', col=brewer.pal(9,'Set1')[timePoint], alpha=.6)
plot3d.SlingshotDataSet(sds, add=TRUE, col=c("black"), lwd=4)

plot3d(reducedDim(sds), aspect = 'iso', col=brewer.pal(9,'Set1')[clDatta], alpha=.6)
plot3d.SlingshotDataSet(sds, add=TRUE, col=c("black"), lwd=4)


plot3d(reducedDim(sds), aspect = 'iso', col=brewer.pal(9,'Set1')[clDatta], alpha=.3,
       box=FALSE)
plot3d.SlingshotDataSet(sds, add=TRUE, col=c("black"), lwd=6)



plot3d(reducedDim(sds), aspect = 'iso', col=brewer.pal(9,'Set1')[clDatta], alpha=.3,
       box=FALSE, axes = FALSE, xlab = "UMAP1", ylab="UMAP2", zlab="UMAP3")
plot3d.SlingshotDataSet(sds, add=TRUE, col=c("black"), lwd=5)
axis3d('x', at = NULL, labels = FALSE, tick = TRUE, line = 0, 
    pos = NULL, nticks = 5) 
axis3d('y', at = NULL, labels = FALSE, tick = TRUE, line = 0, 
    pos = NULL, nticks = 5) 
axis3d('z', at = NULL, labels = FALSE, tick = TRUE, line = 0, 
    pos = NULL, nticks = 5)
# rgl.postscript("../plots/tmp_trajectory.pdf","pdf")

2D plots of trajectory with cell types

# pairs(reducedDim(sds), col=brewer.pal(9,'Set1')[clDatta], pch=16, cex=1/2)

dims <- c(1,2)
plot(reducedDim(sds)[,dims], col=alpha(brewer.pal(9,'Set1')[clDatta], .6), 
     pch=16, cex=1/2, main="Dims 1 and 2")
for(ii in seq_along(slingCurves(sds))){
    c <- slingCurves(sds)[[ii]]
    lines(c$s[c$ord, dims], lwd = 3, col = "black")
}

dims <- c(1,3)
plot(reducedDim(sds)[,dims], col=alpha(brewer.pal(9,'Set1')[clDatta], .6), 
     pch=16, cex=1/2, main="Dims 1 and 3")
for(ii in seq_along(slingCurves(sds))){
    c <- slingCurves(sds)[[ii]]
    lines(c$s[c$ord, dims], lwd = 3, col = "black")
}

dims <- c(2,3)
plot(reducedDim(sds)[,dims], col=alpha(brewer.pal(9,'Set1')[clDatta], .6), 
     pch=16, cex=1/2, main="Dims 1 and 3")
for(ii in seq_along(slingCurves(sds))){
    c <- slingCurves(sds)[[ii]]
    lines(c$s[c$ord, dims], lwd = 3, col = "black")
}

Marker genes

library(ggplot2)
markers <- c("Krt5", "Krt14",
             "Trp63", "Sprr1a", #need 2 good HBC* markers here.
             "Cyp2g1", "Cyp1a2",
             "Ascl1", "Neurod1",
             "Omp") #, "Syt1")

dims <- c(1,3)
rd <- reducedDims(sds)[,dims]

markerPlotList <- list()
for(mm in 1:length(markers)){
  y <- log1p(counts[markers[mm],])
  df <- data.frame(dim1=rd[,1],
                   dim2=rd[,2],
                   y=y)
  markerPlotList[[mm]] <- ggplot(df, aes(x=dim1, y=dim2, colour=y)) +
    geom_point(alpha=.3, size=1/2) +
    scale_color_gradient(low = "gray82", high = "steelblue4") +
    theme_classic() +
    ggtitle(markers[mm]) +
    theme(legend.position = "none") +
    xlab("UMAP1") +
    ylab("UMAP3") +
    coord_fixed()
}


pMarker <- cowplot::plot_grid(plotlist = markerPlotList)
pMarker

Differential expression: most significantly increasing genes in each lineage

getUpGenes <- function(yhatDf, lineage){
  yhatDf1 <- yhatDf[yhatDf$lineage == lineage,]
  upGenes <- yhatDf1 %>% 
    group_by(gene) %>%
    summarize(up = yhat[20] > yhat[1]) %>%
    filter(up == TRUE)
  return(upGenes)
}
asTestRes <- associationTest(sce, lineages = TRUE)
# saveRDS(asTestRes, file = "../data/asTestRes_fig1.rds")

## check which genes are increasing
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ tibble  2.1.3     ✔ dplyr   1.0.3
## ✔ tidyr   1.0.2     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ✔ purrr   0.3.3
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ dplyr::collapse()   masks IRanges::collapse()
## ✖ dplyr::combine()    masks gridExtra::combine(), Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count()      masks matrixStats::count()
## ✖ dplyr::desc()       masks IRanges::desc()
## ✖ purrr::discard()    masks scales::discard()
## ✖ tidyr::expand()     masks S4Vectors::expand()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::first()      masks S4Vectors::first()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()     masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()     masks S4Vectors::rename()
## ✖ purrr::simplify()   masks DelayedArray::simplify()
## ✖ dplyr::slice()      masks IRanges::slice()
yhatDf <- predictSmooth(sce, gene=names(sce), nPoints=20)
upGenesNeur <- getUpGenes(yhatDf, lineage=1)
upGenesSus <- getUpGenes(yhatDf, lineage=2)
upGenesHBC <- getUpGenes(yhatDf, lineage=3)

## order according to test statistic
asTestResNeurIncreas <- asTestRes[rownames(asTestRes) %in% upGenesNeur$gene,]
asTestResSusIncreas <- asTestRes[rownames(asTestRes) %in% upGenesSus$gene,]
asTestResHBCIncreas <- asTestRes[rownames(asTestRes) %in% upGenesHBC$gene,]

ooNeur <- order(asTestResNeurIncreas$waldStat_1, decreasing=TRUE)
ooSus <- order(asTestResSusIncreas$waldStat_2, decreasing=TRUE)
ooHBC <- order(asTestResHBCIncreas$waldStat_3, decreasing=TRUE)



plotlist <- list()
lineages <- c("Neur", "Sus", "HBC")
for(ll in 1:3){
  curoo <- get(paste0("oo",lineages[ll]))
  curres <- get(paste0("asTestRes",lineages[ll],"Increas"))
  plothlp <- list()
  for(kk in 1:4){
    plothlp[[kk]] <- plotSmoothers(sce, counts, gene=rownames(curres)[curoo[kk]], alpha= 1/5, lwd=3/2, size=.1) +
      ggtitle(rownames(curres)[curoo[kk]]) +
      theme(legend.position = "none")
  }
  plotlist[[ll]] <- plothlp
}

pNeur <- cowplot::plot_grid(plotlist=plotlist[[1]])
pSus <- cowplot::plot_grid(plotlist=plotlist[[2]])
pHBC <- cowplot::plot_grid(plotlist=plotlist[[3]])
pNeur

pSus

pHBC

## legend
pLeg <- plotSmoothers(sce, counts, gene=rownames(curres)[curoo[kk]], alpha= 1/5, lwd=3/2) +
  scale_color_viridis_d(alpha = 1/5, labels = c("Neur", "Sus", "rHBC")) +
  guides(colour = guide_legend(override.aes = list(alpha = 1, size=1.5)))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
pLeg

leg <- cowplot::get_legend(pLeg)


### in UMAP space
plotlistUMAP <- list()
for(ll in 1:3){
  curoo <- get(paste0("oo",lineages[ll]))
  curres <- get(paste0("asTestRes",lineages[ll],"Increas"))
  plothlp <- list()
  for(kk in 1:4){
    curGene <- rownames(curres)[curoo[kk]]
    y <- log1p(counts[curGene,])
    df <- data.frame(dim1=rd[,1],
                   dim2=rd[,2],
                   y=y)
    plothlp[[kk]] <- ggplot(df, aes(x=dim1, y=dim2, colour=y)) +
      geom_point(alpha=.3, size=1/2) +
      scale_color_gradient(low = "gray82", high = "steelblue4") +
      theme_classic() +
      ggtitle(curGene) +
      theme(legend.position = "none") +
      xlab("UMAP1") +
      ylab("UMAP3") +
      coord_fixed()
  }
  plotlistUMAP[[ll]] <- plothlp
}
pNeurUMAP <- cowplot::plot_grid(plotlist=plotlistUMAP[[1]])
pSusUMAP <- cowplot::plot_grid(plotlist=plotlistUMAP[[2]])
pHBCUMAP <- cowplot::plot_grid(plotlist=plotlistUMAP[[3]])
pNeurUMAP

pSusUMAP

pHBCUMAP

cowplot::plot_grid(pNeurUMAP, pSusUMAP, pHBCUMAP,
                   labels=letters[1:3], nrow=1, ncol=3)

ggsave("../plots/figure1Markers_umap.png", width=16, height=8)

Composite main plot

p1 <- cowplot::plot_grid(NULL, pMarker, nrow=1, ncol=2, labels=c("a", "b")) #empty space for pasting 3D trajectory
p2 <- cowplot::plot_grid(pNeur, pSus, pHBC, nrow=1, labels=c("c", "d", "e"))
p3 <- cowplot::plot_grid(p2, leg, rel_widths = c(0.9,0.1))
p4 <- cowplot::plot_grid(p1, 
                         p3,
                         nrow=2)
p4

ggsave("../plots/figure1.pdf", width=12, height=10)
ggsave("../plots/figure1.png", width=12, height=10)

Shared vs unique DE genes

library(UpSetR)
asTestRes2 <- tradeSeq::associationTest(sce, l2fc = log2(1.5), lineages = TRUE)

neurGenes <- names(sce)[which(p.adjust(asTestRes2$pvalue_1, "fdr") <= 0.05)]
susGenes <- names(sce)[which(p.adjust(asTestRes2$pvalue_2, "fdr") <= 0.05)]
hbcGenes <- names(sce)[which(p.adjust(asTestRes2$pvalue_3, "fdr") <= 0.05)]
geneList <- list("Neuron" = neurGenes,
                 "Sus" = susGenes,
                 "HBC" = hbcGenes)

png("../plots/fig1_upset.png", units="in", width=9, height=6, res=200)
upset(fromList(geneList), order.by = "freq")
dev.off()
## quartz_off_screen 
##                 2
allGenes <- sort(unique(c(neurGenes, susGenes, hbcGenes)))
sigMat <- matrix(0, nrow=length(allGenes), ncol=length(lineages),
                 dimnames = list(allGenes, lineages))
sigMat[neurGenes, "Neur"] <- 1
sigMat[susGenes, "Sus"] <- 1
sigMat[hbcGenes, "HBC"] <- 1
write.table(sigMat, file="../data/supplementary/significanceMatrix_associationTest_fig1.txt")

Session info

sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.5
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] forcats_0.4.0               stringr_1.4.0              
##  [3] dplyr_1.0.3                 purrr_0.3.3                
##  [5] readr_1.3.1                 tidyr_1.0.2                
##  [7] tibble_2.1.3                tidyverse_1.3.0            
##  [9] scales_1.1.0                gridExtra_2.3              
## [11] UpSetR_1.4.0                wesanderson_0.3.6          
## [13] pheatmap_1.0.12             ggplot2_3.3.2              
## [15] aggregation_1.0.1           RColorBrewer_1.1-2         
## [17] clusterExperiment_2.6.1     bigmemory_4.5.36           
## [19] rgl_0.100.30                cowplot_1.0.0              
## [21] SingleCellExperiment_1.8.0  SummarizedExperiment_1.16.1
## [23] DelayedArray_0.12.2         BiocParallel_1.20.1        
## [25] matrixStats_0.56.0          Biobase_2.46.0             
## [27] GenomicRanges_1.38.0        GenomeInfoDb_1.22.0        
## [29] IRanges_2.20.2              S4Vectors_0.24.3           
## [31] BiocGenerics_0.32.0         tradeSeq_1.3.24            
## [33] slingshot_1.4.0             princurve_2.1.4            
## 
## loaded via a namespace (and not attached):
##   [1] tidyselect_1.1.0        RSQLite_2.2.0           AnnotationDbi_1.48.0   
##   [4] htmlwidgets_1.5.1       grid_3.6.2              combinat_0.0-8         
##   [7] docopt_0.6.1            Rtsne_0.15              RNeXML_2.4.2           
##  [10] munsell_0.5.0           codetools_0.2-16        miniUI_0.1.1.1         
##  [13] withr_2.1.2             colorspace_1.4-1        fastICA_1.2-2          
##  [16] knitr_1.28              uuid_0.1-2              rstudioapi_0.11        
##  [19] zinbwave_1.11.6         NMF_0.21.0              labeling_0.3           
##  [22] slam_0.1-47             GenomeInfoDbData_1.2.2  bit64_0.9-7            
##  [25] farver_2.0.3            rhdf5_2.30.1            rprojroot_1.3-2        
##  [28] vctrs_0.3.6             generics_0.0.2          xfun_0.12              
##  [31] R6_2.4.1                doParallel_1.0.15       VGAM_1.1-2             
##  [34] locfit_1.5-9.1          manipulateWidget_0.10.0 bitops_1.0-6           
##  [37] assertthat_0.2.1        promises_1.1.0          gtable_0.3.0           
##  [40] phylobase_0.8.6         rlang_0.4.10            genefilter_1.68.0      
##  [43] splines_3.6.2           lazyeval_0.2.2          broom_0.5.4            
##  [46] yaml_2.2.1              reshape2_1.4.3          modelr_0.1.5           
##  [49] crosstalk_1.0.0         backports_1.1.5         httpuv_1.5.2           
##  [52] tools_3.6.2             gridBase_0.4-7          ellipsis_0.3.0         
##  [55] Rcpp_1.0.6              plyr_1.8.5              progress_1.2.2         
##  [58] zlibbioc_1.32.0         RCurl_1.98-1.1          densityClust_0.3       
##  [61] prettyunits_1.1.1       pbapply_1.4-2           viridis_0.5.1          
##  [64] haven_2.2.0             ggrepel_0.8.1           cluster_2.1.0          
##  [67] fs_1.3.1                here_0.1                magrittr_1.5           
##  [70] RSpectra_0.16-0         reprex_0.3.0            RANN_2.6.1             
##  [73] hms_0.5.3               mime_0.9                evaluate_0.14          
##  [76] xtable_1.8-4            XML_3.99-0.3            sparsesvd_0.2          
##  [79] readxl_1.3.1            HSMMSingleCell_1.6.0    compiler_3.6.2         
##  [82] crayon_1.3.4            htmltools_0.5.1.1       mgcv_1.8-31            
##  [85] later_1.0.0             lubridate_1.7.4         howmany_0.3-1          
##  [88] DBI_1.1.0               dbplyr_1.4.2            MASS_7.3-51.4          
##  [91] Matrix_1.3-2            ade4_1.7-13             cli_2.0.1              
##  [94] igraph_1.2.4.2          pkgconfig_2.0.3         bigmemory.sri_0.1.3    
##  [97] rncl_0.8.3              registry_0.5-1          locfdr_1.1-8           
## [100] xml2_1.3.2              foreach_1.4.7           annotate_1.64.0        
## [103] rngtools_1.5            pkgmaker_0.31           webshot_0.5.2          
## [106] XVector_0.26.0          bibtex_0.4.2.2          rvest_0.3.5            
## [109] digest_0.6.27           DDRTree_0.1.5           softImpute_1.4         
## [112] rmarkdown_2.1           cellranger_1.1.0        edgeR_3.28.0           
## [115] kernlab_0.9-29          shiny_1.4.0             lifecycle_0.2.0        
## [118] monocle_2.14.0          nlme_3.1-142            jsonlite_1.6.1         
## [121] Rhdf5lib_1.8.0          fansi_0.4.1             viridisLite_0.3.0      
## [124] limma_3.42.1            pillar_1.4.3            lattice_0.20-38        
## [127] fastmap_1.0.1           httr_1.4.1              survival_3.1-8         
## [130] glue_1.4.2              qlcMatrix_0.9.7         FNN_1.1.3              
## [133] iterators_1.0.12        bit_1.1-15.2            stringi_1.4.5          
## [136] HDF5Array_1.14.1        blob_1.2.1              memoise_1.1.0          
## [139] irlba_2.3.3             ape_5.3